Data preprocess for Bombus of Canada spatial analysis.
# Load libraries.
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: spatstat.univar
## spatstat.univar 3.1-2
## Loading required package: spatstat.geom
## spatstat.geom 3.3-6
## Loading required package: spatstat.random
## spatstat.random 3.3-3
## Loading required package: spatstat.explore
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## spatstat.explore 3.4-2
## Loading required package: spatstat.model
## Loading required package: rpart
## spatstat.model 3.3-5
## Loading required package: spatstat.linnet
## spatstat.linnet 3.2-5
##
## spatstat 3.3-2
## For an introduction to spatstat, type 'beginner'
library(sp)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(spatstat.geom)
library(RColorBrewer)
# Read occurrence data
bombus_data <- read_tsv("../data/0008996-250402121839773/occurrence.txt")
## Rows: 16088 Columns: 223
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (41): license, institutionCode, collectionCode, basisOfRecord, recorde...
## dbl (18): gbifID, catalogNumber, startDayOfYear, endDayOfYear, year, month...
## lgl (161): accessRights, bibliographicCitation, language, modified, publish...
## dttm (3): lastInterpreted, lastParsed, lastCrawled
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check data.
dim(bombus_data)
## [1] 16088 223
Filter to entries with coordinates and genus Bombus:
bombus_filtered <- bombus_data %>%
filter(!is.na(decimalLatitude), !is.na(decimalLongitude), genus == "Bombus")
Removed 282 records without coordinates and not belong to
Bombus.
Totally, there are 15806 bombus records.
Check specificEpithet:
Definition – Specific Epithet
In biological taxonomy, a specific epithet is the second part of the scientific name (binomial name) of a species. It identifies the species within a given genus.
unique_specific_epithet_all <- unique(bombus_filtered$specificEpithet)
There are 40 specific species in North America.
load('../data/BC_Covariates.Rda')
data_covariates <- DATA
load('../data/BC_Parks.Rda')
data_parks <- DATA
# Create the parks ppp object
ppp_parks <- ppp(
x = data_parks$Parks$X,
y = data_parks$Parks$Y,
window = as.owin(data_parks$Window))
# Add region information as marks
marks(ppp_parks) <- data_parks$Parks$Region
sapply(data_covariates, class)
## Window Elevation Forest HFI
## "SpatialPolygons" "im" "im" "im"
## Dist_Water
## "im"
summary(data_covariates)
## Length Class Mode
## Window 1 SpatialPolygons S4
## Elevation 10 im list
## Forest 10 im list
## HFI 10 im list
## Dist_Water 10 im list
par(mfrow = c(3,2))
plot(data_covariates$Window)
plot(data_covariates$Elevation)
plot(data_covariates$Forest)
plot(data_covariates$HFI)
plot(data_covariates$Dist_Water)
bombus_bc <- subset(bombus_filtered, stateProvince == "British Columbia")
# View(bombus_bc)
# 3185 223
dim(bombus_bc)
## [1] 3185 223
bumble_bc_percent <- (dim(bombus_bc)[1] / dim(bombus_filtered)[1]) * 100
print(paste(bumble_bc_percent, "% of bumble bee sightings across Canada occurred in BC."))
## [1] "20.1505757307352 % of bumble bee sightings across Canada occurred in BC."
There are 3185 bumble bee occurrences in BC provinces. We removed 12621 records.
specificEpithet:unique_specific_epithet_bc <- unique(bombus_bc$specificEpithet)
There are 30 specific species the province of British Columbia, accounting for 75 % of the total found in North America.
# Convert the window to an owin object
sf_obj <- st_as_sf(data_covariates$Window)
window <- as.owin(sf_obj)
plot(window, main="BC Province Window")
# Check dataset columns.
# colnames(bombus_bc)
# Convert the cleaned data frame to an sf object
data_sf <- st_as_sf(bombus_bc, coords = c("decimalLongitude", "decimalLatitude"), crs = 4326)
# Define the target projection
projected_args <- "+proj=aea +lat_0=45 +lon_0=-126 +lat_1=50 +lat_2=58.5 +x_0=1000000 +y_0=0 +datum=NAD83 +units=m +no_defs"
# Transform the coordinates
bombus_bc_projected <- st_transform(data_sf, crs = projected_args)
# View(bombus_bc_projected)
dim(bombus_bc_projected)
## [1] 3185 222
# Get coordinations
bombus_bc_projected_coords <- st_coordinates(bombus_bc_projected)
head(bombus_bc_projected_coords)
## X Y
## [1,] 1467661.3 470452.3
## [2,] 1467661.3 470452.3
## [3,] 1176146.2 399333.2
## [4,] 846420.5 1069165.6
## [5,] 829971.4 1060433.6
## [6,] 846420.5 1069165.6
# Convert to ppp object.
ppp_bombus <- ppp(
x = bombus_bc_projected_coords[, "X"],
y = bombus_bc_projected_coords[, "Y"],
window = window)
## Warning: 62 points were rejected as lying outside the specified window
## Warning: data contain duplicated points
# Plot bombus data
plot(ppp_bombus, main = 'Bumble Bee in BC Province')
## Warning in plot.ppp(ppp_bombus, main = "Bumble Bee in BC Province"): 62 illegal
## points also plotted
From the above plot warning, we can see there are 62 points outside the BC province window. There are also duplicates in our dataset.
# Filter out points outside the window
inside_window <- bombus_bc_projected_coords[, "X"] >= window$xrange[1] &
bombus_bc_projected_coords[, "X"] <= window$xrange[2] &
bombus_bc_projected_coords[, "Y"] >= window$yrange[1] &
bombus_bc_projected_coords[, "Y"] <= window$yrange[2]
marks_df <- data.frame(
specificEpithet = bombus_bc_projected$specificEpithet[inside_window],
locality = bombus_bc_projected$locality[inside_window]
)
# Convert to ppp object
ppp_bombus <- ppp(x = bombus_bc_projected_coords[inside_window, "X"],
y = bombus_bc_projected_coords[inside_window, "Y"],
marks = marks_df,
window = window)
## Warning: 46 points were rejected as lying outside the specified window
## Warning: data contain duplicated points
Remove duplicates.
# Remove duplicates
ppp_bombus <- ppp_bombus[!duplicated(ppp_bombus), ]
There are 2201 duplicated points. Now, there are 984 unique bomble bee datapoints left in BC province.
plot(unmark(ppp_bombus), main = "Bumble Bee Occurrences in BC")
# Extract levels and number of classes
epithet_levels <- unique(marks(ppp_bombus)$specificEpithet)
n_classes <- length(epithet_levels)
# Define color palette
colors <- colorRampPalette(brewer.pal(12, "Set3"))(n_classes)
# Define shape values (repeats if n_classes > 26)
all_shapes <- 0:25
shape_values <- rep(all_shapes, length.out = n_classes)
# Plot
plot(ppp_bombus,
which.marks = "specificEpithet",
main = "Specific Epithet of Bumble Bee in BC",
cols = colors,
pch = shape_values,
cex = 0.8,
legend = FALSE
)
# Add a custom legend
legend(
"topright",
legend = epithet_levels,
col = colors,
pch = shape_values,
cex = 0.6,
ncol = 2
)
Add Distance, Dist Water for marks.
length_max <- length(dist)
# Distance -- 984 data points, no missing records.
dist <- nndist(ppp_bombus)
# Dist Water -- 984 data points, no missing records.
dist_water <- data_covariates$Dist_Water[ppp_bombus]
# Add marks.
marks(ppp_bombus)$Dist <- dist
marks(ppp_bombus)$DistWater <- dist_water
head(marks(ppp_bombus))
Plot
# Plot Distance
plot(ppp_bombus, which.marks = "Dist", main = "Bumble Bee Distance")
# Plot Elevation.
plot(data_covariates$Elevation, main = "Elevation")
points(ppp_bombus$x, ppp_bombus$y,
pch = 16,
cex = 0.6,
col = "black"
);par(new=TRUE)
points(ppp_bombus$x, ppp_bombus$y,
pch = 16,
cex = 0.5,
col = "yellow"
)
# Plot Forest.
plot(data_covariates$Forest, main = "Forest")
points(ppp_bombus$x, ppp_bombus$y,
pch = 16,
cex = 0.6,
col = "yellow"
);par(new=TRUE)
points(ppp_bombus$x, ppp_bombus$y,
pch = 16,
cex = 0.5,
col = "black"
)
# Plot HFI
plot(data_covariates$HFI, main = "HFI")
points(ppp_bombus$x, ppp_bombus$y,
pch = 16,
cex = 0.6,
col = "black"
);par(new=TRUE)
points(ppp_bombus$x, ppp_bombus$y,
pch = 16,
cex = 0.5,
col = "yellow"
)
# Plot Dist_water
plot(data_covariates$Dist_Water, main = "Dist Water")
points(ppp_bombus$x, ppp_bombus$y,
pch = 16,
cex = 0.6,
col = "black"
);par(new=TRUE)
points(ppp_bombus$x, ppp_bombus$y,
pch = 16,
cex = 0.5,
col = "yellow"
)
Plot Park vs. Bumble bee occurrences.
# Plot the point pattern & assign to variable for specific information
ppp_parks_plot <- plot(ppp_parks,
main = "Parks VS. Bumble Bee Occurrences",
col = "grey90",
cols = brewer.pal(n = 5, name = "Set2"),
pch = c(15, 19, 18, 17, 20),
cex = 0.8,
legend = FALSE
)
# Add a custom legend with a title 'Region'
legend("topright",
legend = c("North", "Ok", "South", "Tc", "West", "Bumble Bee"),
title = "Marks",
col = c(brewer.pal(n = 5, name = "Set2"), "yellow"),
pch = c(15, 19, 18, 17, 16, 20),
cex = 0.8
)
# Plot the bumble bee data points
plot(ppp_bombus, add = TRUE, cols = "black", pch = 20, cex = 0.6)
## Plotting the first column of marks
plot(ppp_bombus, add = TRUE, cols = "yellow", pch = 20, cex = 0.3)
## Plotting the first column of marks
ppp_bombus objectsave(ppp_bombus, file = "../data/ppp_bombus.Rda")